-------------------------------------------------------------------------------
--
--  <STRONG>Copyright &copy; 2001, 2002 by Thomas Wolf.</STRONG>
--  <BLOCKQUOTE>
--    This piece of software is free software; you can redistribute it and/or
--    modify it under the terms of the  GNU General Public License as published
--    by the Free Software  Foundation; either version 2, or (at your option)
--    any later version. This software is distributed in the hope that it will
--    be useful, but <EM>without any warranty</EM>; without even the implied
--    warranty of <EM>merchantability or fitness for a particular purpose.</EM>
--    See the GNU General Public License for  more details. You should have
--    received a copy of the GNU General Public License with this distribution,
--    see file "<A HREF="GPL.txt">GPL.txt</A>". If not, write to the Free
--    Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
--    USA.
--  </BLOCKQUOTE>
--  <BLOCKQUOTE>
--    As a special exception from the GPL, if other files instantiate generics
--    from this unit, or you link this unit with other files to produce an
--    executable, this unit does not by itself cause the resulting executable
--    to be covered by the GPL. This exception does not however invalidate any
--    other reasons why the executable file might be covered by the GPL.
--  </BLOCKQUOTE>
--
--  <VERSION ID="1.1">
--
--  <AUTHOR>
--    Thomas Wolf  (TW) <E_MAIL>
--  </AUTHOR>
--
--  <PURPOSE>
--    Various string utilities not provided in the standard library. Some
--    of these also are repeated here, so that one can get all one needs
--    with a single "@with@".
--  </PURPOSE>
--
--  <NOT_TASK_SAFE>
--
--  <NO_STORAGE>
--
--  <HISTORY>
--    01-MAR-2002   TW  Initial version.
--    14-MAR-2002   TW  Added 'Count'.
--    18-MAR-2002   TW  Added 'Letters'.
--    02-MAY-2002   TW  Added 'Identifier'.
--    24-JUN-2002   TW  Added 'Skip_String', 'Shell_Quotes', 'String_Quotes',
--                      'Quote', 'Unquote', 'Next_Non_Blank', and 'Is_Prefix'
--                      and made the exception 'Illegal_Pattern' a renaming.
--    28-JUN-2002   TW  Added 'Unquote_All'.
--    02-AUG-2002   TW  Added 'Replace'.
--    07-AUG-2002   TW  Added 'First_Index', 'Last_Index', and 'Count' with
--                      a string pattern.
--    12-OCT-2002   TW  Added 'Next_Blank'.
--  </HISTORY>
-------------------------------------------------------------------------------

with Ada.Characters.Handling;
with Ada.Strings.Fixed;
with Ada.Strings.Maps;

package Strings is

   pragma Elaborate_Body;

   function To_Lower (Ch : in Character) return Character
     renames Ada.Characters.Handling.To_Lower;

   function To_Upper (Ch : in Character) return Character
     renames Ada.Characters.Handling.To_Upper;

   function To_Lower (S : in String) return String
     renames Ada.Characters.Handling.To_Lower;

   function To_Upper (S : in String) return String
     renames Ada.Characters.Handling.To_Upper;

   function To_Mixed (S : in String) return String;
   --  Maps all character immediately following an underscore ('@_@') or a
   --  period ('@.@') or a white space as defined by @Blanks@ below
   --  to upper case, all others to lower case.

   Forward  : constant Ada.Strings.Direction := Ada.Strings.Forward;
   Backward : constant Ada.Strings.Direction := Ada.Strings.Backward;

   function Count
     (Src : in String;
      Ch  : in Character)
     return Natural;
   --  Returns the number of occurrences of @Ch@ in the string @Src@.

   function Count
     (Source  : in String;
      Pattern : in String)
     return Natural;
   --  As @Ada.Strings.Fixed.Count@, but without mapping and therefore way
   --  faster.

   function Index
     (Src : in String;
      Ch  : in Character;
      Dir : in Ada.Strings.Direction := Forward)
     return Natural;
   --  Returns the index of the first (or last, if @Dir@ is @Backward@)
   --  occurrence of @Ch@ in the string @Src@, or zero if no occurrence
   --  of this character can be found.

   function First_Index
     (Src : in String;
      Ch  : in Character)
     return Natural;
   --  As @Index@, but hard-wired to searching forward.

   function Last_Index
     (Src : in String;
      Ch  : in Character)
     return Natural;
   --  As @Index@, but hard-wired to searching backward.

   function First_Index
     (Source  : in String;
      Pattern : in String)
     return Natural;
   --  As @Index@, but hard-wired to searching forward. Way faster than
   --  @Ada.Strings.Fixed.Index@, also because no mapping is applied.

   function Last_Index
     (Source   : in String;
      Pattern  : in String)
     return Natural;
   --  As @Index@, but hard-wired to searching backward. Way faster than
   --  @Ada.Strings.Fixed.Index@, also because no mapping is applied.

   function Index
     (Source  : in String;
      Pattern : in String;
      Dir     : in Ada.Strings.Direction := Forward)
     return Natural;
   --  As @Ada.Strings.Fixed.Index@, but hard-wired to not using a mapping.

   function Is_Prefix
     (Source : in String;
      Prefix : in String)
     return Boolean;
   --  Returns @True@ if @Source@ starts with @Prefix@, @False@ otherwise.

   function Is_Suffix
     (Source : in String;
      Suffix : in String)
     return Boolean;
   --  Returns @True@ if @Source@ ends with @Suffix@, @False@ otherwise.

   Blanks   : constant Ada.Strings.Maps.Character_Set;
   --  Anything that can be considered white space: not just a blank, but
   --  also tabs, non-breaking spaces, carriage returns, and so on.

   Letters  : constant Ada.Strings.Maps.Character_Set;
   --  7-bit ASCII letters, i.e. A-Z and a-z.

   Null_Set : constant Ada.Strings.Maps.Character_Set :=
     Ada.Strings.Maps.Null_Set;

   function Is_Blank
     (Ch : in Character)
     return Boolean;
   --  Returns <CODE>Ada.Strings.Maps.Is_In (Ch, Blanks)</CODE>.

   function Is_In
     (Set : in Ada.Strings.Maps.Character_Set;
      Ch  : in Character)
     return Boolean;
   --  Returns <CODE>Ada.Strings.Maps.Is_In (Ch, Set)</CODE>. Provided
   --  mainly because I very often mix up the order of the arguments.

   Left  : constant Ada.Strings.Trim_End := Ada.Strings.Left;
   Right : constant Ada.Strings.Trim_End := Ada.Strings.Right;
   Both  : constant Ada.Strings.Trim_End := Ada.Strings.Both;

   function Trim
     (S    : in String;
      Side : in Ada.Strings.Trim_End := Both)
     return String;
   --  Removes all characters in @Blanks@ declared above from the
   --  specified string end.

   function Trim
     (S     : in String;
      Left  : in Ada.Strings.Maps.Character_Set;
      Right : in Ada.Strings.Maps.Character_Set := Null_Set)
     return String
     renames Ada.Strings.Fixed.Trim;
   --  Removes the specified character sets. The point of this renaming is
   --  the default parameter.

   No_Escape : constant Character := Character'Val (0);
   --  This constant is used to indicate to the string parsing operations
   --  @Get_String@ and @In_String@ that string delimiters cannot be escaped.

   Shell_Quotes  : constant Ada.Strings.Maps.Character_Set;
   --  Quotes typically recognized by command shells: double, single, and
   --  back quote.

   String_Quotes : constant Ada.Strings.Maps.Character_Set;
   --  Typical string quotes: double and single quotes.

   procedure Get_String
     (S        : in     String;
      From, To :    out Natural;
      Delim    : in     Character := '"';
      Escape   : in     Character := No_Escape);
   --  Returns in @From@ and @To@ the indices of the beginning or end of the
   --  next string in @S@.
   --
   --  A string is defined as a sequence of characters enclosed by @Delim@;
   --  any occurrences of @Delim@ after the first @Delim@ that are
   --  immediately preceeded by @Escape@ do not yet terminate the string
   --  but are part of the string's content.
   --
   --  <TABLE BORDER=0>
   --  <TR><TH>@Escape@</TH><TH></TH></TR>
   --  <TR><TD NOWRAP VALIGN="TOP"><CODE>
   --  /= Delim
   --  </CODE></TD><TD>
   --  Delimiters that are part of the string must follow an
   --  @Escape@ immediately. Two @Escape@s in a row
   --  are considered one literal @Escape@. For instance, with
   --  <CODE>Delim = '"'</CODE> and <CODE>Escape = '\'</CODE>, the operation
   --  recognizes C strings.
   --  </TD></TR>
   --  <TR><TD NOWRAP VALIGN="TOP"><CODE>
   --  = Delim
   --  </CODE></TD><TD>
   --  Delimiters that are part of the string must be doubled, an in Ada
   --  strings.
   --  </TD></TR>
   --  <TR><TD NOWRAP VALIGN="TOP"><CODE>
   --  = No_Escape
   --  </CODE></TD><TD>
   --  Strings cannot contain instances of the delimiter. The second
   --  occurrence of a delimiter in @S@ is the string end.
   --  </TD></TR></TABLE>
   --
   --  If no string is found, both @From@ and @To@ are zero.
   --
   --  If an unterminated string is found, @From@ is the index of
   --  the opening occurrence of @Delim@, and @To@ is zero.
   --
   --  Otherwise, a string was found, and @From@ and @To@ are the indices of
   --  the opening and closing occurrences of @Delim@, respectively.

   function In_String
     (S      : in String;
      Delim  : in Character := '"';
      Escape : in Character := No_Escape)
     return Boolean;
   --  Returns @True@ if the end of @S@ is within an unterminated "string"
   --  (as described above), and @False@ otherwise. (If @S@ ends with
   --  an unterminated string, returns @True@, otherwise @False@.)

   function Skip_String
     (S      : in String;
      Delim  : in Character := '"';
      Escape : in Character := No_Escape)
     return Natural;
   --  Returns the index of the closing occurrence of @Delim@ of the string
   --  in @S@. <CODE>S (S'First)</CODE> should be the opening occurrence of
   --  @Delim@. The semantics of @Delim@ and @Escape@ are as for @Get_String@.
   --
   --  Returns zero if co closing occurrence of @Delim@ can be found in @S@.

   function Quote
     (S      : in String;
      Delim  : in Character;
      Escape : in Character)
     return String;
   --  Quote a string. @S@ is supposed to contain the string's contents
   --  (without the delimiters). Any embedded delimiter is quoted as follows:
   --
   --  <UL>
   --    <LI>If <CODE>Escape = No_Escape</CODE>, @S@ is returned.
   --    <LI>If <CODE>Escape = Delim</CODE>, all occurrences of @Delim@ in
   --        @S@ are replaced by <EM>two</EM> @Delim@s.
   --    <LI>Otherwise, an @Escape@ is inserted before any occurrence of
   --        @Delim@ or @Escape@ in @S@.
   --  </UL>

   function Unquote
     (S      : in String;
      Delim  : in Character;
      Escape : in Character)
     return String;
   --  Unquotes embedded delimiters in a string. @S@ is supposed to contain
   --  the string's contents without the bounding delimiters.
   --
   --  <UL>
   --    <LI>If <CODE>Escape = No_Escape</CODE>, @S@ is returned.
   --    <LI>If <CODE>Escape = Delim</CODE>, all non-overlapping occurrences
   --        of two consecutive @Delim@s in @S@ are replaced by a single
   --        @Delim@.
   --    <LI>Otherwise, any non-overlapping occurrence of two @Escape@s in
   --        @S@ is replaced by a single @Escape@, and any occurrence of an
   --        @Escape@ immediately followed by a @Delim@ is replaced by a
   --        single @Delim@.
   --  </UL>
   --
   --  In all cases, the following is true:
   --  <PRE>
   --    Unquote (Quote (S, Delim, Escape), Delim, Escape) = S
   --  </PRE>

   function Unquote_All
     (S      : in String;
      Quotes : in Ada.Strings.Maps.Character_Set;
      Escape : in Character := No_Escape)
     return String;
   --  Unquotes all non-overlapping occurrences of strings within @S@
   --  delimited by any character in @Quotes@. If @Escape@ = @No_Escape@,
   --  the Ada convention (embedded delimiters must be doubled) is assumed,
   --  otherwise, embedded delimiters must be escaped by @Escape@.

   ----------------------------------------------------------------------------

   function Identifier
     (S : in String)
     return Natural;
   --  If @S@ starts with an identifier, returns the index of the identifier's
   --  last character. Otherwise, returns zero. For the purpose of this
   --  function, an identifier has the following syntax:
   --
   --  <PRE>
   --     Identifier = Letter {Letter | Digit | '_'}.
   --     Letter     = 'A' .. 'Z' | 'a' ..'z'.
   --     Digit      = '0' .. '9'.
   --  </PRE>
   --
   --  Note that this is the Ada 95 syntax, except that multiple underscores
   --  in a row are allowed.

   function Next_Non_Blank
     (S : in String)
     return Natural;
   --  Returns the index of the first character in @S@ such that
   --  <CODE>Is_Blank (S (I)) = False</CODE>, or zero if no such character
   --  exists in @S@.

   function Next_Blank
     (S : in String)
     return Natural;
   --  Returns the index of the first character in @S@ for which
   --  <CODE>Is_Blank (S (I)) = True</CODE>, or zero if there is no such
   --  character in @S@.

   function Replace
     (Source : in String;
      What   : in String;
      By     : in String)
     return String;
   --  Replaces all non-overlapping occurrences of @What@ in @Source@ by @By@.
   --  Occurrences of @What@ in @By@ are <EM>not</EM> replaced recursively,
   --  as this would lead to an infinite recursion anyway.

   ----------------------------------------------------------------------------

   No_Set_Inverter      : constant Character := Character'Val (0);

   Illegal_Pattern      : exception renames Ada.Strings.Pattern_Error;
   --  Raised by @Wildcard_Match@ if a pattern is malformed.

   generic
      Any_One      : in Character := '?';
      Zero_Or_More : in Character := '*';
      Set_Inverter : in Character := '!';
      Has_Char_Set : in Boolean   := True;
      Has_Escape   : in Boolean   := True;
      Zero_Or_One  : in Boolean   := False;
   function Wildcard_Match
     (Pattern : in String;
      Text    : in String)
     return Boolean;
   --  Returns @True@ if the wildcard string @Pattern@ matches the text
   --  @Text@, and @False@ otherwise. Raises @Illegal_Pattern@ if the
   --  pattern is malformed.
   --
   --  <STRONG>Wildcard patterns</STRONG> are a simple form of regular
   --  expressions. Their syntax is as follows: (This description assumes
   --  the default values for all generic parameters.)
   --
   --  <TABLE BORDER=0>
   --  <TR><TD VALIGN="TOP">@?@</TD>
   --      <TD>Matches any one character.</TD></TR>
   --  <TR><TD VALIGN="TOP">@*@</TD>
   --      <TD>Matches any sequence of characters (zero or more).</TD></TR>
   --  <TR><TD VALIGN="TOP">@[...]@</TD>
   --      <TD>The characters between the square brackets define a character
   --          set. Matches any one character of the given set.</TD></TR>
   --  <TR><TD VALIGN="TOP">@[!...]@</TD>
   --      <TD>Defines an inverted set. Matches any one character <EM>not</EM>
   --          listed.</TD></TR>
   --  </TABLE>
   --
   --  Character sets are given either by specifying a range ("a-z"), single
   --  characters ("xyz") or any combination of the two ("a-zA-Z0123"). If the
   --  first character in the set is '!', the set is inverted, i.e. it contains
   --  all characters not listed.
   --
   --  Any character that is not one of the meta characters '@?@', '@*@',
   --  '@[@', '@]@', and '@\@' matches literally. To do a literal match against
   --  any meta character, escape it with a backslash, or use a one-character
   --  character set.
   --
   --  @\?@ or @[?]@ matches a ?<BR>
   --  @\*@ or @[*]@ matches a *<BR>
   --  @\[@ or @[[]@ matches a [<BR>
   --  @\]@ or @[]]@ matches a ]<BR>
   --  @\\@ or @[\]@ matches a \<BR>
   --
   --  In a character set, characters must not and need not be escaped. To
   --  include the character '@!@' in a character set, make sure it is not the
   --  character immediately following the '@[@'. To include '@]@' in a
   --  character set, make sure it follows the opening '@[@' (or the opening
   --  "@[!@" in the case of an inverted set) immediately. To include '@-@'
   --  in a character set, make it either the first or last character of the
   --  set, or the lower or upper bound of a range, e.g. "@[-a-z]@", or
   --  "@[abc-]@", or "@[ab --9]@", or "@[!-./]@".
   --
   --  (Note that in "@[ab --9]@", the set is '@a@' or '@b@' or (' ' to '@-@')
   --  or '@9@', not '@a@' or '@b@' or ' ' or ('@-@' to '@9@'), i.e. the
   --  earliest interpretation of a range is taken. Also note that the set
   --  "@[abc--9]@" is illegal because in the range "@c--@", '@c@' > '@-@'.
   --  Specify this set as "@[--9abc]@" instead.)
   --
   --  The '@!@' used for set inversion matches literally when used outside a
   --  character set. It is a meta character only when immediately following
   --  the opening '@[@' of a character set.
   --
   --  Note that by default '@?@' matches any <EM>one</EM> character, not zero
   --  or one!
   --
   --  Matches always are <EM>case sensitive</EM>. To do a case
   --  <EM>in</EM>sensitive match, map upper-case letter to lower-case
   --  letters in both the text and the pattern before calling this routine.
   --
   --  Note: if character sets are not allowed, they match literally. E.g.
   --  the pattern "@[abc]@" would then match the text "@[abc]@", but not
   --  "@a@".
   --
   --  <STRONG>Generic Parameters:</STRONG>
   --  <TABLE BORDER=0><TR><TD VALIGN="TOP"><CODE>
   --  Any_One</CODE></TD>
   --  <TD>
   --  The character used to match any one arbitrary text character. If
   --  @Zero_Or_One@ (see below) is True, this character matches
   --  zero or one arbitrary characters.
   --  </TD></TR><TR><TD VALIGN="TOP"><CODE>
   --  Zero_Or_More</CODE></TD>
   --  <TD>
   --  The character used to match zero or more arbitrary characters.
   --  </TD></TR><TR><TD VALIGN="TOP"><CODE>
   --  Set_Inverter</CODE></TD>
   --  <TD>
   --  The character used for inverting a character set. If it is
   --  @No_Set_Inverter@, but @Has_Char_Set@ (see below) is @True@,
   --  character sets cannot be inverted. If @Has_Char_Set@ is @False@,
   --  @Set_Inverter@ is ignored.
   --  </TD></TR><TR><TD VALIGN="TOP"><CODE>
   --  Has_Char_Set</CODE></TD>
   --  <TD>
   --  If @True@, character sets are supported, otherwise, they're not
   --  allowed and the set meta characters '@[@' and '@]@' always match
   --  literally. (Note that the set inverter (by default '@!@') always
   --  matches literally if used outside a character set.)
   --  </TD></TR><TR><TD VALIGN="TOP"><CODE>
   --  Has_Escape</CODE></TD>
   --  <TD>
   --  If @True@, backslash-escaping of meta characters is supported. If
   --  @False@, it is not, and one-character character sets must be used
   --  for literal matches against meta characters.
   --  </TD></TR><TR><TD VALIGN="TOP"><CODE>
   --  Zero_Or_One</CODE></TD>
   --  <TD>
   --  If @True@, the @Any_One@ character matches zero or one text
   --  characters. If @False@, @Any_One@ <EM>must</EM> match a text
   --  character.
   --  </TD></TR></TABLE>
   --
   --  The three characters used for @Any_One@, @Zero_Or_More@ and
   --  @Set_Inverter@ should of course be distinct, and not coincide with
   --  any of the other meta characters either!
   --
   --  Note that character sets always must match a character; a null match is
   --  never allowed. (If null matches were allowed, a pattern like "@[!a]*@"
   --  would also match texts starting with "@a@"!)

   function Match
     (Pattern : in String;
      Text    : in String)
     return Boolean;
   --  A default instantiation of the above @Wildcard_Match@.

private

   Blanks   : constant Ada.Strings.Maps.Character_Set :=
     Ada.Strings.Maps.To_Set
       (Ada.Strings.Maps.Character_Ranges'
         (1 => (Character'Val (0), ' '),
          2 => (Character'Val (127), Character'Val (159))));

   Letters : constant Ada.Strings.Maps.Character_Set :=
     Ada.Strings.Maps.To_Set ("ABCDEFGHIJKLMNOPQRSTUVWXYZ" &
                              "abcdefghijklmnopqrstuvwxyz");

   Shell_Quotes  : constant Ada.Strings.Maps.Character_Set :=
     Ada.Strings.Maps.To_Set ("'""`");

   String_Quotes : constant Ada.Strings.Maps.Character_Set :=
     Ada.Strings.Maps.To_Set ("'""");

   pragma Inline (Is_Blank, Is_In, Is_Prefix, Is_Suffix,
                  First_Index, Last_Index);

end Strings;
